##  FISH 552, Introduction to R
##  Beginning code for Lecture 6 -- Data Manipulation 2

## This code is inteded to give you the datasets (made up or otherwise)
#  that we will use during the in-class examples.  Add to this script the 
#  code go over in class plus the Hands-On exercises.  

##########################
#data used with apply and tapply
#########################
#Make a data matrix called subj
rownm <- paste("Patient",1:20)
colnm <- paste("Yr",1:5)
subj <- matrix(round(rnorm(n=100),3),ncol=5,dimnames=list(rownm,colnm))
head(subj)

apply(subj, MARGIN=1, mean)

apply(subj, MARGIN=2, mean)

apply(subj, MARGIN=1, quantile, c(0.025, 0.975))


#create 3 vectors that corespond to fish lengths, weights, and sex
lengths <- sample(1:100,size=20, replace=T)
genders <- sample(c("Male","Female","Unknown"), 
                  size=20, replace=T)
weights <- sample(200:250, size=20, replace=T)

tapply(X=lengths, INDEX=genders, FUN=mean)
tapply(X=lengths, INDEX=list(genders, weights), FUN=mean)


##################################
#data used with order() and sort()
###################################
(cards <- sample(1:10))  #randomizes numbers from 1 to 10 and stores in object called cards
sort(cards)
rev(sort(cards))
order(cards)
cards[order(cards)]
YY<-data.frame(ID=sample(1:10),dev=round(rnorm(10),3))
YY[order(YY$ID),]
# 10 normally distributed random numbers with mean 0 and std dev 1.
# Rounded to 3 decimal places
(rndNums <- round(rnorm(10),3))
ZZ<-data.frame(
laws <- sample(1:3,replace=T, size=10),  #Randomly draw 10 numbers ranging from 1 to 3.  Numbers can be used more than once.
year <- sample(2010:2012,replace=T,size=10),  #Same as above but numbers range from 2010 to 2012
state <- c("WA","OR","CA","VT","NY","RI","FL","UT","AZ","TX")  #10 different state abbriviations
)
ZZ
ZZ[order(ZZ$laws, ZZ$year, ZZ$state),]
##################################
#Merge
###################################
station1 <- cbind(time1=1:100, data=rnorm(100))

#note: a common mistake when using seq is to get 
#the length of the resulting vector wrong. Here it is 
#length 21.
station2 <- cbind(time2=seq(0,100,5), 
                  category=sample(1:3,replace=T,size=21))

stationX<-merge(station1, station2, by.x="time1", by.y="time2",all=T)
intersect
##################################
#Working with dates and times
###################################

everyday <- seq(from=as.Date("2014-01-01"), 
                to=as.Date("2014-12-31"), by="day")  
#Note how you can use "day" as the interval in seq() when the data type is Date.  Nifty.

# loads a revised do.csv data file that has a column called dateTime that 
# contains the date and time for each data point in POSIX format.
do <- read.csv("do.csv", header=T, colClasses=c(dateTime="POSIXlt"))